
cd $sectorpath 

// 1- prepare the data
// here we merge with different covariates, taking care of the nomenclature 

use rauch_sitc2.dta , clear
g sitc2_3d =substr(sitc2,1,3) if substr(sitc,1,4)=="0" 
keep if sitc2_3d!="."
keep con lib sitc2_3d
duplicates drop sitc2_3d, force
sort sitc2_3d 
save rauch_sitc2_3digit.dta, replace

use rauch_sitc2.dta , clear
drop if substr(sitc,1,4)=="0" 
rename sitc2 sitc2_4d
keep con lib sitc2_4d
sort sitc2_4d 
save rauch_sitc2_4digit.dta, replace

import delimited hs02_all_ranking_en.csv, clear 
keep if year==2003 
rename id hs4 
tostring hs4, replace
sort hs4 
save pci_hs4, replace

use $rspath/rs_hs02_19962006 , clear
g hs6=hs6_2002

sort hs6 
merge 1:1 hs6_2002 using product_code_baci02.dta 
drop if _m==2 
drop _m

sort hs6 
merge 1:m hs6_2002 using hs2002_to_sitc2.dta 
drop if _m==2 
drop _m

sort sitc2_4d 
merge m:1 sitc2_4d using rauch_sitc2_4digit.dta // Rauch measure
drop if _m==2 
rename _m _mm
rename lib lib_34 
rename con con_34 
sort sitc2_3d 
merge m:1 sitc2_3d using rauch_sitc2_3digit.dta // Rauch measure
replace lib_34 = lib if _mm==1 
replace con_34 = con if _mm==1 
drop if _merge==2
drop _m* 


duplicates drop hs6, force 
sort hs6 
merge 1:1 hs6 using isic.dta 
drop if _m==2 
drop _m 

tostring i2code, replace 
g industry_code=substr(i2code,1,3)
sort industry_code
merge m:1 industry_code using contract_intensity_ISIC_1997.dta // Nunn' measure of relationship specificity 
drop _m 

sort hs6 
merge 1:1 hs6 using upstreamness_hs6.dta // Antras et al. measure of upstreamness
drop if _m==2 
drop _m 

g hs4 = substr(hs6,1,4) 
g hs2 = substr(hs6,1,2) 

sort hs4 
merge m:1 hs4 using sigma_hs4_imbsmejean.dta // Imbs & Mejean measure of elasticity 
drop if _m==2 
drop _m 


sort hs4 
merge m:1 hs4 using pci_hs4 // Hausman & Hidalgo measure of product complexity
drop if _m==2 
drop _m 

destring hs6_2002, replace
sort hs6_2002 
merge 1:1 hs6_2002 using bec_hs2002 // Broad Economic Classification 
drop if _m==2
drop _m


//  2 - build variables 


g dif_rauch= lib_34=="n"
replace dif_rauch=. if lib_34==""
g lup=log(upstreamness) 
g lsigma=log(sigma)
replace lib_34="Differentiated" if lib_34=="n" 
replace lib_34="Reference price" if lib_34=="r" 
replace lib_34="Homogenous" if lib_34=="w" 

// 3 - correlations with external product characteristics 

* broad economic classification 
egen meanbec=median(rs_hs6_b), by(becproductdescription)
egen nb=count(rs_hs6_b) if rs_hs6_b!=., by(becproductdescription)
label var rs_hs6_b "Relationship stickiness"
graph box rs_hs6_b if nb>5, over(becproductdescription, sort(meanbec)) hor noout 
graph export "$outputpath/figureOA11.pdf", replace 
drop meanbec 
drop nb 

* correlation with several measures 

putexcel set $outputpath/table3.xls, replace

local j=2
foreach i in dif_rauch frac_lib_not_homog lup lsigma pci {
putexcel B`j' = "`i'"	
corr rs_hs6_b `i'	
local rho = r(rho)
putexcel C`j' =`rho'
local j = `j'+1
}

reg rs_hs6_b dif_rauch frac_lib_not_homog lup lsigma pci, ro
local j = 2 
foreach i in  dif_rauch frac_lib_not_homog lup lsigma pci {
putexcel D`j' = _b[`i']
putexcel E`j' = _se[`i']
local j=`j'+1
}
local num = e(N)
putexcel D8 = `num'
putexcel B8 = "Observations"
local num = e(r2)
putexcel D9 = `num'
putexcel B9 = "R2" 

save $rspath/rs_hs02_19962006_controls, replace 

use $firmpath/new_$dataset, clear 
collapse (count) nbr=export, by(id_conc year month)
collapse (mean) nbr, by(id_conc)
merge 1:m id_conc using $rspath/rs_baseline_id_conc19962006
g lnbr=log(nbr) 
pwcorr nbr lnbr  rs_hs6_se

/* example of sticky products with a high/low level of upstreamness or input specificty 

foreach i in rs_hs6_b lup pci frac_lib_not_homog {
	xtile decile_`i'=`i', n(10)
}

br rs_hs6_b upstreamness shortdescription hs6_2002 hs2002productdescription dif_rauch if decile_lup==10 & decile_rs_hs6_b==1
br rs_hs6_b upstreamness shortdescription hs6_2002 hs2002productdescription dif_rauch  if decile_lup==10 & decile_rs_hs6_b==10
br rs_hs6_b pci shortdescription hs6_2002 hs2002productdescription dif_rauch if decile_pci==10 & decile_rs_hs6_b==1
br rs_hs6_b frac_lib_not_homog shortdescription hs6_2002 hs2002productdescription 	dif_rauch  if decile_frac_lib_not_homog==10 & decile_rs_hs6_b==1
br rs_hs6_b dif_rauch shortdescription hs6_2002 hs2002productdescription if dif_rauch==1  & decile_rs_hs6_b==1
